* Figure2prep.do  07/25/22
* Calculate residual demographics in annual CDC Wonder data (mcd x age x gender)
* use 2020 population for 2021, as CDC Wonder does
* the result is 23 years X 1 race X 2 genders X 4 ages (18+ is duplicative) X 9 Census divisions X 3 mcd codes = 4,968 observations, 8 of which are missing deaths

use "PopSeriesAnnual.dta" if year==2020, clear
replace year = 2021
tempfile fi21
save `fi21', replace
use "PopSeriesAnnual.dta" if year<=2020, clear
append using `fi21'
sort gender age race state place year
save `fi21', replace

use "DeathSeriesAnnual.dta" if (race=="All") & (gender==`"{"Male"}"' | gender=="All") & (age=="0+" | age=="18+" | age=="0-44" | age=="45-64") & ucd=="Drug" & (mcd=="T402T403" | mcd=="T400T401T404" | mcd=="Opioid"), clear
sort gender age race state place year
merge gender age race state place year using `fi21'
drop if race!="All"
tab _merge
drop _merge

* recalculate 0+ records to be 65+
replace age="65+" if age=="0+"
gen tmppop = pop if age!="18+"
gen tmpdeaths = deaths if age!="18+"
egen tmpsum = sum(tmppop), by(gender mcdtitle state place year)
replace pop = 2*tmppop - tmpsum if age=="65+"
drop tmpsum
egen tmpsum = sum(tmpdeaths), by(gender mcdtitle state place year)
egen tmpnum = count(tmpdeaths), by(gender mcdtitle state place year)
replace deaths = 2*tmpdeaths - tmpsum if age=="65+"
replace deaths = . if tmpnum<3 & age=="65+"   // missing a component of the sum
drop tmpsum tmpnum tmppop tmpdeaths

* recalculate All gender records to be Female
replace gender=`"{"Female"}"' if gender=="All"
egen tmpsum = sum(pop), by(age mcdtitle state place year)
replace pop = 2*pop - tmpsum if gender==`"{"Female"}"'
drop tmpsum
egen tmpsum = sum(deaths), by(age mcdtitle state place year)
egen tmpnum = count(deaths), by(age mcdtitle state place year)
replace deaths = 2*deaths - tmpsum if gender==`"{"Female"}"'
replace deaths = . if tmpnum<2 & gender==`"{"Female"}"'   // missing a component of the sum
drop tmpsum tmpnum
